#Import Data from data
import pandas as pd
import numpy as np
import panel as pn
from panel.interact import interact
from panel import widgets
import plotly.express as px
pn.extension()
import hvplot.pandas
import pandasql as ps
import matplotlib.animation as animation
athlete_events = pd.read_csv('athlete_events.csv',index_col='ID')
athlete_events.head()
| Name | Sex | Age | Height | Weight | Team | NOC | Games | Year | Season | City | Sport | Event | Medal | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| ID | ||||||||||||||
| 1 | A Dijiang | M | 24.0 | 180.0 | 80.0 | China | CHN | 1992 Summer | 1992 | Summer | Barcelona | Basketball | Basketball Men's Basketball | NaN |
| 2 | A Lamusi | M | 23.0 | 170.0 | 60.0 | China | CHN | 2012 Summer | 2012 | Summer | London | Judo | Judo Men's Extra-Lightweight | NaN |
| 3 | Gunnar Nielsen Aaby | M | 24.0 | NaN | NaN | Denmark | DEN | 1920 Summer | 1920 | Summer | Antwerpen | Football | Football Men's Football | NaN |
| 4 | Edgar Lindenau Aabye | M | 34.0 | NaN | NaN | Denmark/Sweden | DEN | 1900 Summer | 1900 | Summer | Paris | Tug-Of-War | Tug-Of-War Men's Tug-Of-War | Gold |
| 5 | Christine Jacoba Aaftink | F | 21.0 | 185.0 | 82.0 | Netherlands | NED | 1988 Winter | 1988 | Winter | Calgary | Speed Skating | Speed Skating Women's 500 metres | NaN |
athlete_events.head(10)
| Name | Sex | Age | Height | Weight | Team | NOC | Games | Year | Season | City | Sport | Event | Medal | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| ID | ||||||||||||||
| 1 | A Dijiang | M | 24.0 | 180.0 | 80.0 | China | CHN | 1992 Summer | 1992 | Summer | Barcelona | Basketball | Basketball Men's Basketball | NaN |
| 2 | A Lamusi | M | 23.0 | 170.0 | 60.0 | China | CHN | 2012 Summer | 2012 | Summer | London | Judo | Judo Men's Extra-Lightweight | NaN |
| 3 | Gunnar Nielsen Aaby | M | 24.0 | NaN | NaN | Denmark | DEN | 1920 Summer | 1920 | Summer | Antwerpen | Football | Football Men's Football | NaN |
| 4 | Edgar Lindenau Aabye | M | 34.0 | NaN | NaN | Denmark/Sweden | DEN | 1900 Summer | 1900 | Summer | Paris | Tug-Of-War | Tug-Of-War Men's Tug-Of-War | Gold |
| 5 | Christine Jacoba Aaftink | F | 21.0 | 185.0 | 82.0 | Netherlands | NED | 1988 Winter | 1988 | Winter | Calgary | Speed Skating | Speed Skating Women's 500 metres | NaN |
| 5 | Christine Jacoba Aaftink | F | 21.0 | 185.0 | 82.0 | Netherlands | NED | 1988 Winter | 1988 | Winter | Calgary | Speed Skating | Speed Skating Women's 1,000 metres | NaN |
| 5 | Christine Jacoba Aaftink | F | 25.0 | 185.0 | 82.0 | Netherlands | NED | 1992 Winter | 1992 | Winter | Albertville | Speed Skating | Speed Skating Women's 500 metres | NaN |
| 5 | Christine Jacoba Aaftink | F | 25.0 | 185.0 | 82.0 | Netherlands | NED | 1992 Winter | 1992 | Winter | Albertville | Speed Skating | Speed Skating Women's 1,000 metres | NaN |
| 5 | Christine Jacoba Aaftink | F | 27.0 | 185.0 | 82.0 | Netherlands | NED | 1994 Winter | 1994 | Winter | Lillehammer | Speed Skating | Speed Skating Women's 500 metres | NaN |
| 5 | Christine Jacoba Aaftink | F | 27.0 | 185.0 | 82.0 | Netherlands | NED | 1994 Winter | 1994 | Winter | Lillehammer | Speed Skating | Speed Skating Women's 1,000 metres | NaN |
# How trends have changed across the history for country
team_medal_year_count=ps.sqldf("select NOC as Team, Year, count(*) as medal_count from athlete_events where Season='Summer' and Medal='Gold' group by Team, Year order by year, medal_count desc")
team_medal_year_count['rank']=team_medal_year_count.groupby('Year')['medal_count'].rank(ascending=False).astype(int)
top_ten_country_by_year=team_medal_year_count[team_medal_year_count['rank'] < 21]
top_ten_country_by_year.head(20)
| Team | Year | medal_count | rank | |
|---|---|---|---|---|
| 0 | GER | 1896 | 24 | 1 |
| 1 | USA | 1896 | 11 | 2 |
| 2 | GRE | 1896 | 10 | 3 |
| 3 | FRA | 1896 | 5 | 4 |
| 4 | AUS | 1896 | 2 | 7 |
| 5 | AUT | 1896 | 2 | 7 |
| 6 | GBR | 1896 | 2 | 7 |
| 7 | GBR | 1896 | 2 | 7 |
| 8 | HUN | 1896 | 2 | 7 |
| 9 | DEN | 1896 | 1 | 10 |
| 10 | SUI | 1896 | 1 | 10 |
| 11 | FRA | 1900 | 22 | 1 |
| 12 | USA | 1900 | 18 | 2 |
| 13 | FRA | 1900 | 17 | 3 |
| 14 | SUI | 1900 | 13 | 4 |
| 15 | GBR | 1900 | 12 | 5 |
| 16 | GBR | 1900 | 11 | 6 |
| 17 | GBR | 1900 | 9 | 7 |
| 18 | USA | 1900 | 9 | 7 |
| 19 | GBR | 1900 | 7 | 9 |
fig=px.bar(top_ten_country_by_year, x="Team", y="medal_count", animation_frame="Year", animation_group="Team",range_x=[-1,21],range_y=[0,300])
fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 6000
fig

usa_win_by_event=ps.sqldf("select Sport, Year, count(*) as medal_count from athlete_events where Season='Summer' and NOC='USA' group by Sport, Year order by year, medal_count desc")
usa_win_by_event['rank']=usa_win_by_event.groupby('Year')['medal_count'].rank(ascending=False).astype(int)
top_ten_usa_win_by_event=usa_win_by_event[usa_win_by_event['rank'] < 6]
top_ten_usa_win_by_event.head(20)
| Sport | Year | medal_count | rank | |
|---|---|---|---|---|
| 0 | Athletics | 1896 | 21 | 1 |
| 1 | Shooting | 1896 | 4 | 2 |
| 2 | Swimming | 1896 | 2 | 3 |
| 3 | Athletics | 1900 | 91 | 1 |
| 4 | Tennis | 1900 | 10 | 2 |
| 5 | Golf | 1900 | 9 | 3 |
| 6 | Rowing | 1900 | 9 | 3 |
| 7 | Polo | 1900 | 4 | 5 |
| 8 | Sailing | 1900 | 4 | 5 |
| 13 | Gymnastics | 1904 | 416 | 1 |
| 14 | Athletics | 1904 | 158 | 2 |
| 15 | Golf | 1904 | 100 | 3 |
| 16 | Archery | 1904 | 70 | 4 |
| 17 | Cycling | 1904 | 60 | 5 |
| 31 | Athletics | 1906 | 66 | 1 |
| 32 | Swimming | 1906 | 8 | 2 |
| 33 | Tennis | 1906 | 4 | 3 |
| 34 | Wrestling | 1906 | 2 | 4 |
| 35 | Diving | 1906 | 1 | 5 |
| 36 | Athletics | 1908 | 143 | 1 |
fig = px.bar(top_ten_usa_win_by_event, x='Year', y='medal_count',
hover_data=['Sport'], color='Sport')
fig.show()
